{
  "nbformat": 4,
  "nbformat_minor": 2,
  "metadata": {
    "kernelspec": {
      "language": "python",
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "version": "3.6.4",
      "file_extension": ".py",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "name": "python",
      "mimetype": "text/x-python"
    },
    "colab": {
      "name": "simpsons.ipynb",
      "provenance": []
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "# Installing `caer` and `canaro` since they don't come pre-installed\n",
        "!pip install --upgrade caer canaro"
      ],
      "outputs": [],
      "metadata": {
        "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
        "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
        "trusted": true,
        "id": "MQoxjirY4S5o"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "import os\n",
        "import caer\n",
        "import canaro\n",
        "import numpy as np\n",
        "import cv2 as cv\n",
        "import gc\n",
        "#pylint:disable=no-member (Removes linting problems with cv)"
      ],
      "outputs": [],
      "metadata": {
        "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a",
        "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
        "trusted": true,
        "id": "_yldS7bG4S58"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "IMG_SIZE = (80,80)\n",
        "channels = 1\n",
        "char_path = r'../input/the-simpsons-characters-dataset/simpsons_dataset'"
      ],
      "outputs": [],
      "metadata": {
        "trusted": true,
        "id": "TuC64gTq4S6K"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "# Creating a character dictionary, sorting it in descending order\n",
        "char_dict = {}\n",
        "for char in os.listdir(char_path):\n",
        "    char_dict[char] = len(os.listdir(os.path.join(char_path,char)))\n",
        "\n",
        "# Sort in descending order\n",
        "char_dict = caer.sort_dict(char_dict, descending=True)\n",
        "char_dict"
      ],
      "outputs": [],
      "metadata": {
        "trusted": true,
        "id": "RD5OHUE84S6U"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "#  Getting the first 10 categories with the most number of images\n",
        "characters = []\n",
        "count = 0\n",
        "for i in char_dict:\n",
        "    characters.append(i[0])\n",
        "    count += 1\n",
        "    if count >= 10:\n",
        "        break\n",
        "characters"
      ],
      "outputs": [],
      "metadata": {
        "trusted": true,
        "id": "OQ09DqmI4S6g"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "# Create the training data\n",
        "train = caer.preprocess_from_dir(char_path, characters, channels=channels, IMG_SIZE=IMG_SIZE, isShuffle=True)"
      ],
      "outputs": [],
      "metadata": {
        "trusted": true,
        "id": "X4UnTWk74S6q"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "# Number of training samples\n",
        "len(train)"
      ],
      "outputs": [],
      "metadata": {
        "trusted": true,
        "id": "ZaSuzC2J4S6z"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "# Visualizing the data (OpenCV doesn't display well in Jupyter notebooks)\n",
        "import matplotlib.pyplot as plt\n",
        "plt.figure(figsize=(30,30))\n",
        "plt.imshow(train[0][0], cmap='gray')\n",
        "plt.show()"
      ],
      "outputs": [],
      "metadata": {
        "trusted": true,
        "id": "hSw-V2H24S7A"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "# Separating the array and corresponding labels\n",
        "featureSet, labels = caer.sep_train(train, IMG_SIZE=IMG_SIZE)"
      ],
      "outputs": [],
      "metadata": {
        "trusted": true,
        "id": "arO-90034S7J"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "from tensorflow.keras.utils import to_categorical\n",
        "\n",
        "# Normalize the featureSet ==> (0,1)\n",
        "featureSet = caer.normalize(featureSet)\n",
        "# Converting numerical labels to binary class vectors\n",
        "labels = to_categorical(labels, len(characters))"
      ],
      "outputs": [],
      "metadata": {
        "trusted": true,
        "id": "Sl8VnLCY4S7O"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "x_train, x_val, y_train, y_val = caer.train_test_split(featureSet, labels, val_ratio=.2)"
      ],
      "outputs": [],
      "metadata": {
        "trusted": true,
        "id": "pzXXrqbt4S7S"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "del train\n",
        "del featureSet\n",
        "del labels \n",
        "gc.collect()"
      ],
      "outputs": [],
      "metadata": {
        "trusted": true,
        "id": "emsrpYWZ4S7W"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "# Useful variables when training\n",
        "BATCH_SIZE = 32\n",
        "EPOCHS = 10"
      ],
      "outputs": [],
      "metadata": {
        "trusted": true,
        "id": "NkS1ceD94S7a"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "# Image data generator (introduces randomness in network ==> better accuracy)\n",
        "datagen = canaro.generators.imageDataGenerator()\n",
        "train_gen = datagen.flow(x_train, y_train, batch_size=BATCH_SIZE)"
      ],
      "outputs": [],
      "metadata": {
        "trusted": true,
        "id": "_atEyygG4S7g"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "# Create our model (returns a compiled model)\n",
        "model = canaro.models.createSimpsonsModel(IMG_SIZE=IMG_SIZE, channels=channels, output_dim=len(characters), \n",
        "                                         loss='binary_crossentropy', decay=1e-7, learning_rate=0.001, momentum=0.9,\n",
        "                                         nesterov=True)"
      ],
      "outputs": [],
      "metadata": {
        "trusted": true,
        "id": "Y8fjXBuH4S7m"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "model.summary()"
      ],
      "outputs": [],
      "metadata": {
        "trusted": true,
        "id": "CepcT54J4S7t"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "# Training the model\n",
        "\n",
        "from tensorflow.keras.callbacks import LearningRateScheduler\n",
        "callbacks_list = [LearningRateScheduler(canaro.lr_schedule)]\n",
        "training = model.fit(train_gen,\n",
        "                    steps_per_epoch=len(x_train)//BATCH_SIZE,\n",
        "                    epochs=EPOCHS,\n",
        "                    validation_data=(x_val,y_val),\n",
        "                    validation_steps=len(y_val)//BATCH_SIZE,\n",
        "                    callbacks = callbacks_list)"
      ],
      "outputs": [],
      "metadata": {
        "trusted": true,
        "id": "AknG90ch4S7-"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "characters"
      ],
      "outputs": [],
      "metadata": {
        "trusted": true,
        "id": "CUifBqOG4S8G"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Testing"
      ],
      "metadata": {
        "id": "qiNY_zSy6Quj"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "test_path = r'../input/the-simpsons-characters-dataset/kaggle_simpson_testset/kaggle_simpson_testset/charles_montgomery_burns_0.jpg'\n",
        "\n",
        "img = cv.imread(test_path)\n",
        "\n",
        "plt.imshow(img)\n",
        "plt.show()"
      ],
      "outputs": [],
      "metadata": {
        "trusted": true,
        "id": "ETnmB3DC4S8M"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "def prepare(image):\n",
        "    image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)\n",
        "    image = cv.resize(image, IMG_SIZE)\n",
        "    image = caer.reshape(image, IMG_SIZE, 1)\n",
        "    return image"
      ],
      "outputs": [],
      "metadata": {
        "trusted": true,
        "id": "yJjmMuvj4S8T"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "predictions = model.predict(prepare(img))"
      ],
      "outputs": [],
      "metadata": {
        "trusted": true,
        "id": "hZTjUKKn4S8a"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "# Getting class with the highest probability\n",
        "print(characters[np.argmax(predictions[0])])"
      ],
      "outputs": [],
      "metadata": {
        "trusted": true,
        "id": "3a4AW8qT4S8g"
      }
    }
  ]
}